# Load and clean data ----------------------------------------------------------

ent_df <- 
  readr::read_csv(
    here::here("Data", "Enterprise-Data",
               "Ent data CLEAN FULL - with wattage and consumption.csv"))

# Assign NAs

ent_df[ent_df == "99"] <- NA

# Set names for enterprise types 

entnames <- tibble::tribble(
  ~ent_label,                                    ~ent_type,    ~ent_cat,
  1,                               "Groceries", "Food",
  2,                          "Fruits", "Food",
  3,                                "Sweets", "Food",
  4,     "Clothes",     "Retail",
  5,                 "Electronics",     "Retail",
  6,      "Agricultural",     "Retail",
  7,                              "Medical",     "Retail",
  8,                        "MobileRepair",     "Digital",
  9, "CyberCafe",   "Digital",
  10,                               "Photostudio",   "Digital",
  11,                       "Tailoring",   "Trades",
  12,                             "BeautyParlour",    "Trades",
  13,                       "Carpentry",   "Trades",
  14,      "BicycleRepair",             "Trades",
  15,           "CarScooterRepair",             "Trades",
  16,   "Clinic",    "Health",
  17,               "FlourOilMill",     "Misc",
  18,                         "Warehouse",   "Misc",
  19,                               "ColdStorage",   "Agri",
  20,                 "MilkChilling",   "Agri",
  21,                                     "Others",    "Misc"
)

srv_types <- tibble::tribble(
  ~Appliance,        ~Service,
  "num_mobile",             "-",
  "num_bulb",         "Light",
  "num_CFL",         "Light",
  "num_LED",         "Light",
  "num_tubelight",         "Light",
  "num_ceilingfan",       "Cooling",
  "num_tablefan",       "Cooling",
  "num_cooler",       "Cooling",
  "num_tv",           "ICT",
  "num_computer",           "ICT",
  "num_printer",           "ICT",
  "num_sewingmc",          "Mechanical",
  "num_weighingmc",          "Mechanical",
  "num_flourmill",          "Mechanical",
  "num_fridge", "Refrigeration",
  "num_others",         "Other"
)

# Set service levels
srv_levels <- c("srv_light", "srv_spcl", "srv_ict", "srv_frdg", "srv_mech", "srv_none")
cat_levels <- c("Trade", "Services", "Manufacturing")

# CREATE MISSING VARIABLES -----------------------------------------------------

# Remove cases where mills are not electrified and using mechanical power from a generator directly
ent_df <- ent_df %>% 
  mutate(flourmill_watt_new =
           ifelse(q301_grid_use == 0 & q345_dg_use == 0 & q335_mgrid_use == 0, 0, 
                  flourmill_watt_new))
                  
# Enterprise names
ent_df <- ent_df %>% 
  left_join(entnames, by = c("q208_shop_type" = "ent_label"))

ent_df <- ent_df %>% 
  mutate(
    q501_R_a_num_othmech = case_when(
      q501_P_Others_others %in% c("Mixer/Juicer", "Air compressor", "Kaiya",
                                  "Submersible pump", "Trimmer", "Randa machine",
                                  "Electric Motor", "Grinder", "Cutter", 
                                  "Flour mill", "Weighing machine",
                                  "Welding machine", "Soldering Iron", 
                                  "Hot air gun", "Electric iron", "Lamination machine",
                                  "Oven", "Heater") ~ q501_P_a_num_others,
      TRUE ~ NA_real_),
    q501_S_a_num_othspcl = case_when(
      q501_P_Others_others %in% c("Air conditioner") ~ q501_P_a_num_others,
      TRUE ~ NA_real_),
    q501_T_a_num_othict = case_when(
      q501_P_Others_others %in% c("Music system", "Radio", 
                                  "Different types of electronic machine",
                                  "Battery charger", 
                                  "Mobile repairing kit") ~ q501_P_a_num_others,
      TRUE ~ NA_real_),
    q501_U_a_num_othlight = case_when(
      q501_P_Others_others %in% c("Rechargeble emergency light") ~ q501_P_a_num_others,
      TRUE ~ NA_real_)
  ) %>% 
  mutate(
    q501_R_b_watt_othmech = case_when(
      q501_R_a_num_othmech > 0 ~ q501_P_b_watt_others * 0.25, # Set a load factor of .25 for mechanical loads to represent on-off switching and use
      TRUE ~ NA_real_),
    q501_R_c_hrs_othmech = case_when(
      q501_R_a_num_othmech > 0 ~ q501_P_e_hrs_others,
      TRUE ~ NA_real_),
    q501_S_b_watt_othspcl = case_when(
      q501_S_a_num_othspcl > 0 ~ q501_P_b_watt_others * 0.25, # Set a load factor of .25 for air conditioner loads to represent on-off switching and use,
      TRUE ~ NA_real_),
    q501_S_c_hrs_othspcl = case_when(
      q501_S_a_num_othspcl > 0 ~ q501_P_e_hrs_others,
      TRUE ~ NA_real_),
    q501_T_b_watt_othict = case_when(
      q501_T_a_num_othict > 0 ~ q501_P_b_watt_others,
      TRUE ~ NA_real_),
    q501_T_c_hrs_othict = case_when(
      q501_T_a_num_othict > 0 ~ q501_P_e_hrs_others,
      TRUE ~ NA_real_),
    q501_U_b_watt_othlight = case_when(
      q501_U_a_num_othlight > 0 ~ q501_P_b_watt_others,
      TRUE ~ NA_real_),
    q501_U_c_hrs_othlight = case_when(
      q501_U_a_num_othlight > 0 ~ q501_P_e_hrs_others,
      TRUE ~ NA_real_)
  )

# Corrected appliance wattages

ent_df <- ent_df %>% 
  mutate(
    q501_A_b_watt_Incades_bulb = incadesbulb_watt_new,
    q501_B_b_watt_CFL = cfl_watt_new,
    q501_C_b_watt_LED = led_watt_new,
    q501_D_b_watt_tubelight = tubelight_watt_new,
    q501_G_b_watt_table_fan = 25,
    q501_F_b_watt_ceiling_fan = ceiling_fan_watt_new,
    q501_G_b_watt_table_fan = 50,
    q501_H_b_watt_tv = tv_watt_new,
    q501_I_b_watt_cooler = cooler_watt_new,
    q501_J_b_watt_fridge = 75,
    q501_K_b_watt_sewingmc = 50,
    q501_L_b_watt_computer = computer_watt_new,
    q501_M_b_watt_printer = 45,
    q501_N_b_watt_weighingmc = 5,
    q501_O_b_watt_flourmill = flourmill_watt_new
  )

# Appliance and electricity consumption dataframe

apps <- ent_df %>% 
  select(enterprise_id, matches("q501"), -matches("appl"), -matches("others")) %>% 
  select(enterprise_id, matches("hrs"), matches("watt"), matches("num")) %>% 
  select(enterprise_id, sort(colnames(.))) %>% 
  rename_all(~str_remove(., "\\w\\d\\d\\d\\w\\w\\w\\w\\w")) %>% 
  rename_all(~str_remove(., "Incades_")) %>% 
  rename_all(~str_replace(.,"ceiling_fan", "ceilingfan")) %>% 
  rename_all(~str_replace(.,"table_fan", "tablefan"))

# Energy service categories
ent_df <- apps %>% 
  group_by(enterprise_id) %>% 
  transmute(
    srv_light = sum(num_bulb, num_CFL, num_LED, num_tubelight, num_othlight, na.rm = T),
    srv_spcl= sum(num_ceilingfan,num_tablefan,num_cooler, num_othspcl, na.rm = T),
    srv_ict = sum(num_tv, num_computer, num_printer, num_othict, na.rm = T),
    srv_mech = sum(num_sewingmc, num_weighingmc, num_flourmill, num_othmech, na.rm = T),
    srv_frdg = sum(num_fridge, na.rm = T)
  ) %>% 
  right_join(ent_df, by = "enterprise_id")

# Monthly Electricity Consumption
ent_df <- apps %>% 
  select(-matches("num")) %>% 
  gather(app, value, -enterprise_id) %>% 
  arrange(enterprise_id) %>% 
  separate(app, sep = "_", into = c("type","app")) %>% 
  spread(type, value = value) %>% 
  mutate(watthours = hrs * watt) %>% 
  group_by(enterprise_id) %>% 
  summarise(monthly_consumption = round(sum(watthours, na.rm = T) * 30 / 1000,1)) %>% 
  right_join(ent_df, by = "enterprise_id")

# Total Appliance Wattage
ent_df <- apps %>% 
  select(-matches("hrs")) %>% 
  gather(app, value, -enterprise_id) %>% 
  arrange(enterprise_id) %>% 
  separate(app, sep = "_", into = c("type","app")) %>% 
  spread(type, value = value) %>% 
  mutate(watts = num * watt) %>% 
  group_by(enterprise_id) %>% 
  summarise(total_watts = round(sum(watts, na.rm = T),0)) %>% 
  right_join(ent_df, by = "enterprise_id")

# Energy service electricity consumption
ent_df <- apps %>% 
  select(-matches("num")) %>% 
  gather(app, value, -enterprise_id) %>% 
  arrange(enterprise_id) %>% 
  separate(app, sep = "_", into = c("type","app")) %>% 
  spread(type, value = value) %>% 
  mutate(watthours = hrs * watt,
         srv = case_when(
           app %in% c("bulb", "CFL", "LED", "tubelight", "othlight") ~ "srv_light",
           app %in% c("ceilingfan", "tablefan", "cooler", "othspcl") ~ "srv_spcl",
           app %in% c("tv", "computer", "printer", "othict") ~ "srv_ict",
           app %in% c("sewingmc", "weighingmc", "flourmill", "othmech") ~ "srv_mech",
           app %in% c("fridge") ~ "srv_frdg")
  ) %>% 
  group_by(enterprise_id, srv) %>% 
  dplyr::summarise(
    monthlykwh = round(sum(watthours, na.rm = T) * 30 / 1000,1)) %>% 
  spread(srv, value = monthlykwh, fill = 0) %>% 
  rename_at(vars(matches("srv")), ~paste0(.,"_kwh")) %>% 
  right_join(ent_df, by = "enterprise_id")

# Total watts for each service
ent_df <- apps %>% 
  select(-matches("hrs")) %>% 
  gather(app, value, -enterprise_id) %>% 
  arrange(enterprise_id) %>% 
  separate(app, sep = "_", into = c("type","app")) %>% 
  spread(type, value = value) %>% 
  mutate(watts = num * watt,
         srv = case_when(
           app %in% c("bulb", "CFL", "LED", "tubelight", "othlight") ~ "srv_light",
           app %in% c("ceilingfan", "tablefan", "cooler", "othspcl") ~ "srv_spcl",
           app %in% c("tv", "computer", "printer", "othict") ~ "srv_ict",
           app %in% c("sewingmc", "weighingmc", "flourmill", "othmech") ~ "srv_mech",
           app %in% c("fridge") ~ "srv_frdg")
  ) %>% 
  group_by(enterprise_id, srv) %>% 
  dplyr::summarise(
    totalwatts = round(sum(watts, na.rm = T),1)) %>% 
  filter(!is.na(srv)) %>% 
  spread(srv, value = totalwatts, fill = 0) %>% 
  rename_at(vars(matches("srv")), ~paste0(.,"_watts")) %>% 
  right_join(ent_df, by = "enterprise_id")

# Daily supply hours (any source)
ent_df <- ent_df %>% 
  select(enterprise_id, matches("hours")) %>% 
  group_by(enterprise_id) %>% 
  transmute(duration_day = pmax(q308_grid_hours,
                                q322_shs_hours,
                                q331_battery_hours,
                                q341_mgrid_hours,
                                q346_dg_hours,
                                na.rm = T)) %>% 
  mutate(duration_day = ifelse(is.na(duration_day),0,duration_day)) %>% 
  ungroup() %>% 
  right_join(ent_df, by = "enterprise_id")

# Use electricity
ent_df$useelec <- ifelse(ent_df$monthly_consumption > 0,1,0)

# Load in village questionnaire ------------------------------------------------

vill_df <- read_delim(here("Data","Community-Survey", "Village data CLEAN FULL 18July.csv"), 
                     delim = ",")

vill_df <- vill_df %>% 
  select(q011_village_code, q009_district, q304_total_households, q305_bpl_households, q501_num_markets, 
         q701_12_railway_station,q401_grid_use, q411a_grid_hours_winter,
         q411b_grid_hours_summer, q406_hamlets_grid, q306_hamlets, q421_mgrid_present, q413_grid_public_private) %>% 
  mutate(perc_bpl = round(q305_bpl_households / q304_total_households,2)) %>% 
  mutate_all(~ifelse(. == 99, NA, .)) %>% 
  transmute(q009_district = q009_district,
            q011_village_code = q011_village_code,
            vill_hh = q304_total_households,
            vill_bpl = perc_bpl,
            vill_markets = q501_num_markets,
            vill_rail = q701_12_railway_station,
            vill_grid = q401_grid_use,
            vill_grid_hrs_win = q411a_grid_hours_winter,
            vill_grid_hrs_sum = q411b_grid_hours_summer,
            vill_grid_hrs = (q411a_grid_hours_winter + q411b_grid_hours_summer)/2,
            vill_grid = ifelse(vill_grid == 1,1,0),
            vill_grid_hrs = ifelse(vill_grid == 0, 0, vill_grid_hrs),
            vill_grid_all = q406_hamlets_grid / q306_hamlets,
            vill_grid_all = ifelse(vill_grid_all >= 1 & !is.na(vill_grid_all), 1, 0),
            vill_mg = as.numeric(q421_mgrid_present == 1 | is.na(q421_mgrid_present)), # The 99  was an MG intervention village
            vill_grid_df = as.numeric(q413_grid_public_private == 0),
            vill_grid_gov = as.numeric(q413_grid_public_private == 1),
            vill_elec_type = case_when(vill_mg == 1 ~ "Grid (Public) + Minigrid",
                                       vill_grid_df == 1 ~ "Grid (Franchise)",
                                       TRUE ~ "Grid (Public)")) %>% 
  left_join(readRDS(here("Data", "Spatial", "timetocity.rds")) %>% 
              tbl_df() %>% select(-geometry))
  
ent_df <- left_join(ent_df, vill_df)

# Remove three non-minigrid / non-grid villages
ent_df <- ent_df %>% 
  filter(!is.na(vill_grid_hrs), vill_grid_hrs > 0)

vill_df <- vill_df %>% 
  filter(!is.na(vill_grid_hrs), vill_grid_hrs > 0, 
         q011_village_code %in% unique(ent_df$q011_village_code))

# Final corrections for modelling / visualusation ------------------------------

ent_df <- ent_df %>% 
  mutate_at(vars(ends_with("kwh")),
            .funs = c(dummy = ~as.numeric(. > 0))) %>% 
  mutate(
    q214_pucca = as.numeric(q214_building_type == 1),
    q213_own = as.numeric(q213_shop_owned == 1),
    q215_area = q215_shop_length * q215_shop_width,
    log_monthly_consumption = log(monthly_consumption + 1),
    log_monthly_income = log(q220_mon_shop_income + 1),
    srvs = factor(case_when(srv_light_kwh_dummy == 1 & srv_spcl_kwh_dummy == 0 & 
                              srv_ict_kwh_dummy == 0 & srv_frdg_kwh_dummy == 0 & 
                              srv_mech_kwh_dummy == 0 ~ "Only light",
                            (srv_light_kwh_dummy == 1 | srv_spcl_kwh_dummy == 1) & 
                              srv_ict_kwh_dummy == 0 & srv_frdg_kwh_dummy == 0 & 
                              srv_mech_kwh_dummy == 0 ~ "Light and fans",
                            srv_ict_kwh_dummy == 1 | srv_frdg_kwh_dummy == 1 | 
                              srv_mech_kwh_dummy == 1 ~ "Light, fans and others",
                            TRUE ~ "None"), 
                  levels = c("None", "Only light", "Light and fans", "Light, fans and others")),
    srvs_none = ifelse(srvs == "None",1,0),
    srvs_light = ifelse(srvs == "Only light",1,0),
    srvs_fans = ifelse(srvs == "Light and fans",1,0),
    srvs_oths = ifelse(srvs == "Light, fans and others",1,0),
    q210_salaried_employees = ifelse(is.na(q210_salaried_employees),0,q210_salaried_employees))

# Remove 12 observations with key missing values 
ent_df <- ent_df %>% 
  filter(!is.na(q301_grid_use), !is.na(q215_area))

# Define electricity source
ent_df <- ent_df %>% 
  mutate(
    elecsrc = case_when(
      q301_grid_use == 1 & 
        ((select(.,q335_mgrid_use, q345_dg_use, q317_shs_use, q412_sl_use, q324_battery_use) %>% 
            rowSums(na.rm = T)) == 0) ~ "Grid",
      q301_grid_use == 1 & 
        ((select(.,q335_mgrid_use, q345_dg_use, q317_shs_use, q412_sl_use, q324_battery_use) %>% 
            rowSums(na.rm = T)) > 0) ~ "Grid + Off-grid",
      q301_grid_use == 0 & 
        ((select(.,q335_mgrid_use, q345_dg_use, q317_shs_use, q412_sl_use, q324_battery_use) %>% 
            rowSums(na.rm = T)) > 0) ~ "Off-grid",
      TRUE ~ "None"),
    elecsrc = factor(elecsrc, levels = c("Grid", "Grid + Off-grid", "Off-grid", "None")),
    elecsrc_gridonly = as.numeric(q301_grid_use == 1 & 
                                ((select(.,q335_mgrid_use, q345_dg_use, q317_shs_use, q412_sl_use, q324_battery_use) %>% 
                                    rowSums(na.rm = T)) == 0)),
    elecsrc_gridogonly = as.numeric(q301_grid_use == 1 & 
                                  ((select(.,q335_mgrid_use, q345_dg_use, q317_shs_use, q412_sl_use, q324_battery_use) %>% 
                                      rowSums(na.rm = T)) > 0)),
    elecsrc_ogonly = as.numeric(q301_grid_use == 0 & 
                              ((select(.,q335_mgrid_use, q345_dg_use, q317_shs_use, q412_sl_use, q324_battery_use) %>% 
                                  rowSums(na.rm = T)) > 0)),
    elecsrc_none = as.numeric(q301_grid_use == 0 & 
                                ((select(.,q335_mgrid_use, q345_dg_use, q317_shs_use, q412_sl_use, q324_battery_use) %>% 
                                    rowSums(na.rm = T)) == 0)),
    elecsrc_gridany = as.numeric(q301_grid_use == 1),
    elecsrc_ogany = as.numeric((select(.,q335_mgrid_use, q345_dg_use, q317_shs_use, q412_sl_use, q324_battery_use) %>% 
                                  rowSums(na.rm = T)) > 0)
  )
